library(data.table)
library(DBI)
library(ggplot2)
library(cowplot)
library(grid)
library(lme4)
library(lmerTest)
Sys.setlocale("LC_TIME", "en_US.UTF-8") # Print English date format
[1] "en_US.UTF-8"
en_US.UTF-8
# Sys.setlocale("LC_TIME", "nl_NL.UTF-8") # Print Dutch date format
number_format <- scales::number_format(big.mark = ",", decimal.mark = ".") # Print English number format
# number_format <- scales::number_format(big.mark = ".", decimal.mark = ",") # Print Dutch number format
theme_paper <- theme_classic(base_size = 12) +
theme(axis.text = element_text(colour = "black"),
panel.grid.major.y = element_line(colour = "grey92"))
School closure and opening dates
Sources:
date_schools_closed <- as.POSIXct("2020-03-16")
date_schools_opened <- as.POSIXct("2020-06-02")
Handle database connections
db_connect <- function() {
db <- dbConnect(RSQLite::SQLite(), file.path("..", "data", "noordhoff.sqlite"))
return(db)
}
db_disconnect <- function(db) {
dbDisconnect(db)
}
The database contains all SlimStampen data collected via Noordhoff’s platform in three courses: Stepping Stones (English), Grandes Lignes (French), and Neue Kontakte (German).
Trial-level response data are stored in the responses table. Book information, such as the course year, book title, and chapter, are stored in the book_info table.
responses| Column | Type | Explanation |
|---|---|---|
| date | int | UNIX time stamp [s] |
| user_id | chr | unique user identifier |
| method | chr | course |
| start_time | int | elapsed time since session start [ms] |
| rt | int | response time [ms] |
| duration | int | trial duration [ms] |
| fact_id | int | unique fact identifier (within chapter) |
| correct | int | response accuracy |
| answer | chr | user’s response |
| choices | int | number of answer choices (1 == open response) |
| backspace_used | dbl | user pressed backspace during trial |
| backspace_used_first | dbl | user erased first character of response |
| study | int | trial was a study trial |
| answer_language | chr | language of the answer |
| subsession | int | identifies part within learning session |
| book_info_id | chr | unique identifier of book information |
book_info| Column | Type | Explanation |
|---|---|---|
| book_info_id | chr | unique identifier of book information |
| method_group | chr | year and edition |
| book_title | chr | book title (incl. year, level, edition) |
| book_type | chr | type of book |
| chapter | chr | chapter number and title |
Preview first 10 rows
db <- db_connect()
responses_top <- dbGetQuery(db, "SELECT * FROM responses_noduplicates LIMIT 10")
responses_top
book_info_top <- dbGetQuery(db, "SELECT * FROM book_info LIMIT 10")
book_info_top
db_disconnect(db)
There are several measures of learning performance we can look at. The most straight-forward of these are response accuracy and response time.
Important factors to keep in mind: question type (multiple choice or open answer) and language. Note that we cannot distinguish between NL-X and X-X, since we only know the language of the answer.
db <- db_connect()
correct <- dbGetQuery(db,
"SELECT r.method AS 'method',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.correct AS 'correct',
COUNT(*) AS 'n'
FROM 'responses_noduplicates' r
WHERE r.study == 0
GROUP BY r.method,
DATE(r.date + 3600, 'unixepoch'),
r.user_id,
r.choices > 1,
r.correct"
)
setDT(correct)
db_disconnect(db)
Fill in missing rows (where all trials on a day were correct/incorrect):
correct <- tidyr::complete(correct, tidyr::nesting(method, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct)
correct[, mcq := as.logical(mcq)]
accuracy <- correct[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(method, doy, user, mcq)]
Add a school year column (cutoff date: 1 August):
accuracy[, doy_posix := as.POSIXct(doy)]
accuracy[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Add sensible course names:
accuracy[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Align school years:
accuracy[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
accuracy[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_by_week_and_user <- accuracy[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]
Add question type labels:
accuracy_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
Plot response accuracy by week (mean +/- 1 SE).
p_acc <- ggplot(accuracy_by_week[(course == "English" & mcq == TRUE) | course == "French",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(. ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format(accuracy = 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type.eps", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
db <- db_connect()
correct_strat <- dbGetQuery(db,
"SELECT r.method AS 'method',
r.book_info_id AS 'book_info_id',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.correct AS 'correct',
COUNT(*) AS 'n'
FROM 'responses_noduplicates' r
WHERE r.study == 0
GROUP BY r.method,
r.book_info_id,
DATE(r.date + 3600, 'unixepoch'),
r.user_id,
r.choices > 1,
r.correct"
)
setDT(correct_strat)
db_disconnect(db)
Fill in missing rows (where all trials on a day were correct/incorrect):
correct_strat <- tidyr::complete(correct_strat, tidyr::nesting(method, book_info_id, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct_strat)
correct_strat[, mcq := as.logical(mcq)]
Add book information:
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)
setDT(book_info)
correct_strat[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
Add sensible course names:
correct_strat[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Add a school year column (cutoff date: 1 August):
correct_strat[, doy_posix := as.POSIXct(doy)]
correct_strat[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Simplify level names:
# Keep all distinctions
correct_strat[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
correct_strat[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
correct_strat[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary\n(havo)",
grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
grepl("havo", book_title) ~ "General secondary\n(havo)",
grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
TRUE ~ "Other")]
correct_strat[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
Simplify year names:
correct_strat[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
Consolidate by day:
accuracy_strat <- correct_strat[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(school_year, doy_posix, course, level, year, user, mcq)]
Align school years:
accuracy_strat[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy_strat[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
accuracy_strat[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy_strat[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_strat_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, year, user, mcq)]
accuracy_strat_by_week <- accuracy_strat_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, year, mcq)]
Add question type labels:
accuracy_strat_by_week_and_user[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
accuracy_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many unique users per group?
accuracy_strat_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, level, year, school_year, question_type)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "French" & level != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.4, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_french_level_year.pdf", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_french_level_year.eps", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_french_level_year.png", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "English" & level != "Other" & question_type == "Multiple\nchoice",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line() +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.4, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_english_level_year.pdf", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_english_level_year.eps", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_english_level_year.png", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
accuracy_level_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, user, mcq)]
accuracy_level_by_week <- accuracy_level_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, mcq)]
Add question type labels:
accuracy_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many users in each group?
accuracy_level_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, level, school_year, mcq)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_level <- ggplot(accuracy_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.6, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_level.eps", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
accuracy_year_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, year, user, mcq)]
accuracy_year_by_week <- accuracy_year_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, year, mcq)]
Add question type labels:
accuracy_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many users in each group?
accuracy_year_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, year, school_year, mcq)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_year <- ggplot(accuracy_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(year ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.6, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_year
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_year.eps", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Fit a mixed effects model to the daily accuracy data:
accuracy[, period := dplyr::case_when(
doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown open answer in 19/20
accuracy[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
accuracy[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
accuracy[, mcq := factor(mcq, levels = c(FALSE, TRUE))]
Since we know the number of trials per day and the proportion correct (accuracy), we can use a binomial GLMM:
if(!file.exists("../output/m_acc_fit.rds")) {
m_acc <- glmer(accuracy ~ period*school_year*mcq + (1 | user) + (1 | course),
data = accuracy[(course == "English" & mcq == TRUE) | course == "French",],
family = "binomial",
weights = n,
nAGQ = 0,
control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
saveRDS(m_acc, "../output/m_acc_fit.rds")
} else {
m_acc <- readRDS("../output/m_acc_fit.rds")
}
m_acc_summary <- summary(m_acc)
m_acc_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
Family: binomial ( logit )
Formula: accuracy ~ period * school_year * mcq + (1 | user) + (1 | course)
Data:
accuracy[(course == "English" & mcq == TRUE) | course == "French", ]
Weights: n
Control:
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))
AIC BIC logLik deviance df.resid
4963073 4963232 -2481522 4963045 671406
Scaled residuals:
Min 1Q Median 3Q Max
-23.9963 -0.9227 0.1606 1.0936 11.4371
Random effects:
Groups Name Variance Std.Dev.
user (Intercept) 0.35034 0.5919
course (Intercept) 0.05511 0.2348
Number of obs: 671420, groups: user, 133419; course, 2
Fixed effects:
Estimate Std. Error
(Intercept) 1.631257 0.166016
periodduring-lockdown 0.259227 0.002725
periodpost-lockdown 0.150384 0.004021
school_year18/19 0.117385 0.002566
mcqTRUE 0.960249 0.002088
periodduring-lockdown:school_year18/19 -0.311338 0.004409
periodpost-lockdown:school_year18/19 -0.242816 0.006325
periodduring-lockdown:mcqTRUE -0.328352 0.002881
periodpost-lockdown:mcqTRUE -0.277485 0.004397
school_year18/19:mcqTRUE 0.003357 0.002667
periodduring-lockdown:school_year18/19:mcqTRUE 0.379504 0.004758
periodpost-lockdown:school_year18/19:mcqTRUE 0.335859 0.006970
z value Pr(>|z|)
(Intercept) 9.826 <2e-16 ***
periodduring-lockdown 95.142 <2e-16 ***
periodpost-lockdown 37.399 <2e-16 ***
school_year18/19 45.755 <2e-16 ***
mcqTRUE 459.903 <2e-16 ***
periodduring-lockdown:school_year18/19 -70.613 <2e-16 ***
periodpost-lockdown:school_year18/19 -38.387 <2e-16 ***
periodduring-lockdown:mcqTRUE -113.954 <2e-16 ***
periodpost-lockdown:mcqTRUE -63.110 <2e-16 ***
school_year18/19:mcqTRUE 1.259 0.208
periodduring-lockdown:school_year18/19:mcqTRUE 79.765 <2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE 48.183 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck -0.007
prdpst-lckd -0.005 0.346
schl_y18/19 -0.008 0.452 0.297
mcqTRUE -0.010 0.534 0.352 0.592
prdd-:_18/19 0.005 -0.626 -0.217 -0.505 -0.340
prdp-:_18/19 0.003 -0.229 -0.640 -0.351 -0.233 0.241
prddr-:TRUE 0.007 -0.824 -0.278 -0.412 -0.649 0.516
prdps-:TRUE 0.004 -0.269 -0.824 -0.264 -0.419 0.169
s_18/19:TRU 0.007 -0.409 -0.270 -0.837 -0.697 0.450
prdd-:_18/19:TRUE -0.004 0.506 0.171 0.439 0.405 -0.854
prdp-:_18/19:TRUE -0.003 0.176 0.523 0.298 0.273 -0.190
prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE 0.183
prdps-:TRUE 0.527 0.315
s_18/19:TRU 0.311 0.481 0.310
prdd-:_18/19:TRUE -0.194 -0.612 -0.194 -0.523
prdp-:_18/19:TRUE -0.843 -0.205 -0.634 -0.358
prdd-:_18/19:TRUE
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE
prdps-:TRUE
s_18/19:TRU
prdd-:_18/19:TRUE
prdp-:_18/19:TRUE 0.221
Save coefficients as a table for in the paper:
m_acc_coef <- as.data.frame(m_acc_summary$coefficients)
setDT(m_acc_coef, keep.rownames = TRUE)
m_acc_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
"Period: lockdown",
"Period: post-lockdown",
"School year: 18/19",
"Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19",
"Period: post-lockdown $\\times$ School year: 18/19",
"Period: lockdown $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ Question type: multiple choice",
"School year: 18/19 $\\times$ Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")
# Format p-values
m_acc_coef$`Pr(>|z|)` <- format.pval(m_acc_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_acc_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_acc_coef$`Pr(>|z|)`) # Remove leading zero
cat(knitr::kable(m_acc_coef,
align = c("l","r", "r", "r", "r"),
digits = c(NA, 3, 3, 2, NA),
col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
format = "latex",
booktabs = TRUE,
escape = FALSE),
file = "../output/m_acc_table.tex")
Visualise the model fit:
acc_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
acc_fit <- cbind(acc_fit, accuracy = predict(m_acc, type = "response", re.form = NA, newdata = acc_fit))
acc_fit
ggplot(acc_fit, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
theme_paper
Empirical means:
accuracy_mean <- accuracy[(course == "English" & mcq == TRUE) | course == "French", .(accuracy = sum(accuracy * n)/sum(n)), by = .(period, school_year, mcq, user, course)][, .(accuracy = mean(accuracy), accuracy_sd = sd(accuracy)), by = .(period, school_year, mcq)]
accuracy_mean
ggplot(accuracy_mean, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
theme_paper
db <- db_connect()
rt <- dbGetQuery(db,
"SELECT r.method AS 'method',
r.book_info_id AS 'book_info_id',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.rt AS 'rt'
FROM 'responses_noduplicates' r
WHERE r.study == 0
AND r.correct == 1
"
)
setDT(rt)
db_disconnect(db)
doys <- rt[, .(doy = unique(doy))][, doy_posix := as.POSIXct(doy)][]
doys[, doy_posix_week := cut.POSIXt(as.POSIXct(doy), "week")]
doys[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
doys[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
doys[school_year == "19/20", doy_posix_aligned := doy_posix]
doys[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
doys[, period := dplyr::case_when(
doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown in 19/20
doys[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
doys[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
rt <- rt[doys, on = "doy"]
rt[, mcq := as.factor(as.logical(mcq))]
rt[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Throw out trials with negative RTs (timing errors)
rt <- rt[rt > 0]
rt_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, doy_posix_week)]
rt_by_week <- rt_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, doy_posix_week)]
Overlap the two school years:
rt_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt <- ggplot(rt_by_week[(course == "English" & mcq == TRUE) | course == "French",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(. ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1.7, 3.7)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type.eps", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Add book info:
rt[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
Simplify level names:
# Keep all distinctions
rt[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
rt[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
rt[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary\n(havo)",
grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
grepl("havo", book_title) ~ "General secondary\n(havo)",
grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
TRUE ~ "Other")]
rt[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
Simplify year names:
rt[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
rt_strat_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, year, doy_posix_week)]
rt_strat_by_week <- rt_strat_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, year, doy_posix_week)]
Overlap the two school years:
rt_strat_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_strat_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_strat_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_level_year <- ggplot(rt_strat_by_week[course == "French",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_french_level_year.pdf", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_french_level_year.eps", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_french_level_year.png", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_rt_level_year <- ggplot(rt_strat_by_week[course == "English" & question_type == "Multiple\nchoice" & level != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_english_level_year.pdf", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_english_level_year.eps", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_english_level_year.png", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
rt_level_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, doy_posix_week)]
rt_level_by_week <- rt_level_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, doy_posix_week)]
Overlap the two school years:
rt_level_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_level_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_level_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_level <- ggplot(rt_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 6)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_level.eps", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
rt_year_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, year, doy_posix_week)]
rt_year_by_week <- rt_year_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, year, doy_posix_week)]
Overlap the two school years:
rt_year_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_year_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_year_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_year <- ggplot(rt_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(year ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_year
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_year.eps", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
rt_model_dat <- rt[, .(rt_median = median(rt)), by = .(course, school_year, period, doy_posix, mcq, user)]
Fit a generalised linear mixed effects model (assuming a Gamma distribution for RT and an identity link function; Lo & Andrew, 2015) to the daily median RT:
if(!file.exists("../output/m_rt_fit.rds")) {
m_rt <- glmer(rt_median ~ period*school_year*mcq + (1 | user) + (1 | course),
data = rt_model_dat[(course == "English" & mcq == TRUE) | course == "French",],
family = Gamma(link = "identity"),
nAGQ = 0,
control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
saveRDS(m_rt, "../output/m_rt_fit.rds")
} else {
m_rt <- readRDS("../output/m_rt_fit.rds")
}
m_rt_summary <- summary(m_rt)
m_rt_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
Family: Gamma ( identity )
Formula:
rt_median ~ period * school_year * mcq + (1 | user) + (1 | course)
Data: rt_model_dat[(course == "English" & mcq == TRUE) | course ==
"French", ]
Control:
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))
AIC BIC logLik deviance df.resid
10332190 10332361 -5166080 10332160 669366
Scaled residuals:
Min 1Q Median 3Q Max
-2.732 -0.345 -0.070 0.241 220.533
Random effects:
Groups Name Variance Std.Dev.
user (Intercept) 2.828e+05 531.8041
course (Intercept) 2.852e+02 16.8883
Residual 1.324e-01 0.3639
Number of obs: 669381, groups: user, 133398; course, 2
Fixed effects:
Estimate Std. Error t value
(Intercept) 2133.210 13.019 163.848
periodduring-lockdown 207.475 6.937 29.909
periodpost-lockdown 198.282 10.619 18.673
school_year18/19 28.172 6.564 4.292
mcqTRUE 150.852 5.382 28.028
periodduring-lockdown:school_year18/19 -280.435 11.362 -24.681
periodpost-lockdown:school_year18/19 -294.190 17.057 -17.247
periodduring-lockdown:mcqTRUE -210.118 7.464 -28.151
periodpost-lockdown:mcqTRUE -199.308 11.721 -17.005
school_year18/19:mcqTRUE -62.059 6.894 -9.002
periodduring-lockdown:school_year18/19:mcqTRUE 278.331 12.257 22.708
periodpost-lockdown:school_year18/19:mcqTRUE 315.189 18.735 16.824
Pr(>|z|)
(Intercept) < 2e-16 ***
periodduring-lockdown < 2e-16 ***
periodpost-lockdown < 2e-16 ***
school_year18/19 1.77e-05 ***
mcqTRUE < 2e-16 ***
periodduring-lockdown:school_year18/19 < 2e-16 ***
periodpost-lockdown:school_year18/19 < 2e-16 ***
periodduring-lockdown:mcqTRUE < 2e-16 ***
periodpost-lockdown:mcqTRUE < 2e-16 ***
school_year18/19:mcqTRUE < 2e-16 ***
periodduring-lockdown:school_year18/19:mcqTRUE < 2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck -0.251
prdpst-lckd -0.161 0.334
schl_y18/19 -0.277 0.483 0.311
mcqTRUE -0.335 0.567 0.368 0.619
prdd-:_18/19 0.159 -0.617 -0.207 -0.533 -0.352
prdp-:_18/19 0.105 -0.214 -0.626 -0.356 -0.234 0.229
prddr-:TRUE 0.226 -0.861 -0.281 -0.434 -0.651 0.530
prdps-:TRUE 0.143 -0.275 -0.854 -0.275 -0.413 0.170
s_18/19:TRU 0.245 -0.440 -0.284 -0.879 -0.703 0.483
prdd-:_18/19:TRUE -0.141 0.529 0.173 0.474 0.403 -0.882
prdp-:_18/19:TRUE -0.092 0.176 0.537 0.311 0.263 -0.191
prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE 0.179
prdps-:TRUE 0.534 0.305
s_18/19:TRU 0.322 0.490 0.310
prdd-:_18/19:TRUE -0.195 -0.613 -0.187 -0.539
prdp-:_18/19:TRUE -0.872 -0.194 -0.627 -0.354
prdd-:_18/19:TRUE
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE
prdps-:TRUE
s_18/19:TRU
prdd-:_18/19:TRUE
prdp-:_18/19:TRUE 0.212
Save coefficients as a table for in the paper:
m_rt_coef <- as.data.frame(m_rt_summary$coefficients)
setDT(m_rt_coef, keep.rownames = TRUE)
m_rt_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
"Period: lockdown",
"Period: post-lockdown",
"School year: 18/19",
"Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19",
"Period: post-lockdown $\\times$ School year: 18/19",
"Period: lockdown $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ Question type: multiple choice",
"School year: 18/19 $\\times$ Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")
# Format p-values
m_rt_coef$`Pr(>|z|)` <- format.pval(m_rt_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_rt_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_rt_coef$`Pr(>|z|)`) # Remove leading zero
cat(knitr::kable(m_rt_coef,
align = c("l","r", "r", "r", "r"),
digits = c(NA, 3, 3, 2, NA),
col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
format = "latex",
booktabs = TRUE,
escape = FALSE),
file = "../output/m_rt_table.tex")
Visualise the model fit:
rt_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
rt_fit <- cbind(rt_fit, rt = predict(m_rt, type = "response", re.form = NA, newdata = rt_fit))
rt_fit
ggplot(rt_fit, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(1500, 4000)) +
theme_paper
Empirical means:
rt_mean <- rt_model_dat[(course == "English" & mcq == TRUE) | course == "French", .(rt = mean(rt_median)), by = .(period, school_year, mcq, user, course)][, .(rt = mean(rt), rt_sd = sd(rt)), by = .(period, school_year, mcq, course)]
rt_mean[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
rt_mean
ggplot(rt_mean, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
facet_grid(~ course) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(1500, 3000)) +
theme_paper
p_legend <- get_legend(p_acc)
Warning: Removed 24 row(s) containing missing values (geom_path).
p_acc <- p_acc +
guides(colour = FALSE, fill = FALSE, lty = FALSE)
p_rt <- p_rt +
guides(colour = FALSE, fill = FALSE, lty = FALSE)
Combine plots:
plot_grid(
plot_grid(p_acc, p_rt,
ncol = 1,
labels = c("A", "B")),
p_legend,
rel_widths = c(1, .2)
)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/combi_acc_rt.pdf", width = 9, height = 3.5)
ggsave("../output/combi_acc_rt.eps", width = 9, height = 3.5)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/combi_acc_rt.png", width = 9, height = 3.5)
Get the unique book chapter IDs on each day:
db <- db_connect()
progress <- dbGetQuery(db,
"SELECT DISTINCT r.book_info_id AS 'book_info_id',
r.method AS 'method',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
COUNT(*) AS 'trials'
FROM 'responses_noduplicates' r
GROUP BY r.method,
r.book_info_id,
DATE(r.date + 3600, 'unixepoch');"
)
db_disconnect(db)
setDT(progress)
Join with the book chapter information:
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)
setDT(book_info)
progress[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title", "chapter") := .(i.method_group, i.book_title, i.chapter)]
Add sensible course names:
progress[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Add a school year column (cutoff date: 1 August):
progress[, doy_posix := as.POSIXct(doy)]
progress[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Consolidate count by day and chapter:
progress_by_day <- progress[, .(trials = sum(trials)), by = .(school_year, doy_posix, course, method_group, book_title, chapter)]
Simplify level names:
# Keep all distinctions
progress_by_day[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
progress_by_day[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
progress_by_day[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary (havo)",
grepl("vmbo", book_title) ~ "Pre-vocational (vmbo)",
grepl("havo", book_title) ~ "General secondary (havo)",
grepl("vwo", book_title) ~ "Pre-university (vwo)",
TRUE ~ "Other")]
progress_by_day[, level := factor(level, levels = c("Other", "Pre-vocational (vmbo)", "General secondary (havo)", "Pre-university (vwo)"))]
Simplify year names:
progress_by_day[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
Simplify chapter names:
# In most cases, the chapter name starts with a number
progress_by_day[, chapter_simple := factor(as.numeric(stringr::str_extract(chapter, "^\\d{1,2}")))]
# Remaining cases:
unique(progress_by_day[is.na(chapter_simple),]$chapter)
[1] "BS2 Dienstleistung"
[2] "BS5 Reisen"
[3] "BS1 Familie und Beziehungen"
[4] "BS3 Dienstleistung"
[5] "BS4 Reisen und Verkehr"
[6] "Lernliste Brückenschlag"
[7] "BS2 Freizeit"
[8] "BS1 Schule und Ausbildung"
[9] "Bridging the Gap Year 2"
[10] "Bridging the Gap Year 1"
[11] "Bridging the Gap Exam Preparation"
[12] "Exam Preparation"
[13] "Bridging the Gap mbo"
[14] "Bridging the Gap havo"
BS2 Dienstleistung
BS5 Reisen
BS1 Familie und Beziehungen
BS3 Dienstleistung
BS4 Reisen und Verkehr
Lernliste Brückenschlag
BS2 Freizeit
BS1 Schule und Ausbildung
Bridging the Gap Year 2
Bridging the Gap Year 1
Bridging the Gap Exam Preparation
Exam Preparation
Bridging the Gap mbo
Bridging the Gap havo
# Combine these chapters into an "other" category
progress_by_day[is.na(chapter_simple), chapter_simple := "O"]
Align school years:
progress_by_day[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
progress_by_day[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week and month. Each day is assigned the date of the most recent Monday.
progress_by_day[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
progress_by_day[, doy_posix_aligned_month := cut.POSIXt(doy_posix_aligned, "month")]
Calculate proportions by week and month:
progress_by_week <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_week, course, level, year, chapter_simple)]
progress_by_week[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_week, course, level, year)]
progress_by_month <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_month, course, level, year, chapter_simple)]
progress_by_month[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_month, course, level, year)]
setorder(progress_by_week, chapter_simple)
setorder(progress_by_month, chapter_simple)
p_french_y1 <- ggplot(progress_by_week[course == "French" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_french_y2 <- ggplot(progress_by_week[course == "French" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_french_y3 <- ggplot(progress_by_week[course == "French" & year == "Year 3/4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_progress_french <- plot_grid(p_french_y1, p_french_y2, p_french_y3,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3/4"),
hjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
p_progress_french
ggsave("../output/progress_french.pdf", width = 9, height = 9)
ggsave("../output/progress_french.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_french.png", width = 9, height = 6)
Did the share of trials change between school years? We can simplify the analysis by aggregating over the whole lockdown period.
progress_lockdown <- progress_by_day[between(doy_posix_aligned, date_schools_closed, date_schools_opened), .(trials = sum(trials)), by = .(school_year, course, level, year, chapter_simple)]
# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_lockdown <- as.data.table(tidyr::complete(progress_lockdown, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0)))
progress_lockdown[, prop := trials/sum(trials), by = .(school_year, course, level, year)]
setorder(progress_lockdown, chapter_simple)
ggplot(progress_lockdown[course == "French"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(level ~ year) +
geom_col(colour = NA) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter",
title = "French") +
theme_paper
Perform a chi-square test of homogeneity to determine whether school years are significantly different.
for (y in sort(unique(progress_lockdown$year))) {
for (l in levels(progress_lockdown$level)) {
d <- progress_lockdown[course == "French" & year == y & level == l]
if (nrow(d) > 0) {
print(paste("French", y, l, collapse= " "))
print(
chisq.test(
dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
)
)
}
}
}
[1] "French Year 1 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 26799, df = 5, p-value < 2.2e-16
[1] "French Year 1 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 229477, df = 5, p-value < 2.2e-16
[1] "French Year 1 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 40312, df = 5, p-value < 2.2e-16
[1] "French Year 2 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 13882, df = 5, p-value < 2.2e-16
[1] "French Year 2 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 50486, df = 5, p-value < 2.2e-16
[1] "French Year 2 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 46882, df = 5, p-value < 2.2e-16
[1] "French Year 3/4 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 30269, df = 7, p-value < 2.2e-16
[1] "French Year 3/4 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 23169, df = 5, p-value < 2.2e-16
[1] "French Year 3/4 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 21545, df = 5, p-value < 2.2e-16
Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).
Visualise the change between school years:
progress_lockdown[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(x = chapter_simple, y = (prop_change * 100), colour = chapter_simple, group = school_year)) +
facet_grid(level ~ year, scales = "free_x") +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(xend = chapter_simple), yend = 0) +
geom_point() +
scale_y_continuous(limits = c(-25, 25)) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)",
title = "French") +
theme_paper
Are these changes really important? We may expect a certain amount of fluctuation between any pair of school years. We don’t have data from before the 18/19 school year, but we can look at how the magnitude of changes during the lockdown period compares to changes earlier in the school year.
To keep things as comparable as possible, use a sliding time window with the same size as the lockdown period:
time_window <- as.numeric(round(date_schools_opened - date_schools_closed))
time_window
[1] 78
date_range <- sort(unique(progress_by_day$doy_posix_aligned))
date_range <- date_range[date_range < date_schools_closed]
prop_change_window <- data.table()
for (i in 1:(length(date_range) - as.numeric(time_window))) {
d <- date_range[i:(i + time_window - 1)]
progress_window <- progress_by_day[course %in% c("French", "English") & doy_posix_aligned %in% d,
.(trials = sum(trials)),
by = .(school_year, course, level, year, chapter_simple)]
# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_window <- as.data.table(tidyr::complete(progress_window, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0)))
progress_window[, prop := trials/sum(trials), by = .(school_year, course, level, year)]
progress_window[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
prop_change_window <- rbind(prop_change_window, progress_window[school_year == "19/20",][,window := i][,date_min := min(d)][,date_max := max(d)])
}
The density of year-to-year changes can be visualised by time window:
ggplot(prop_change_window, aes(x = prop_change * 100, y = window, group = window)) +
ggridges::geom_density_ridges(alpha = 0.1, scale = 25, fill = NA) +
labs(x = "Change in trial share\n(percentage points)",
y = "Time window") +
theme_paper
Picking joint bandwidth of 0.584
Compare the aggregated density to the changes during the lockdown period:
prop_change_combined <- rbind(prop_change_window[, period := "Pre-lockdown"], progress_lockdown[course %in% c("French", "English") & school_year == "19/20", period := "Lockdown"], fill = TRUE)
prop_change_combined[, period := factor(period, levels = c("Pre-lockdown", "Lockdown"))]
ggplot(prop_change_combined, aes(x = prop_change, colour = period)) +
geom_density() +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = "Change in trial share\n(percentage points)",
y = "Density",
colour = NULL) +
theme_paper
prop_change_sd <- prop_change_window[, .(sd = sd(prop_change) * 100), by = .(course, year, level)]
Add boundaries based on the typical spread to the change plot:
p_change_french <- ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(colour = chapter_simple)) +
facet_grid(year ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
scale_y_continuous(breaks = c(-20, 0, 20)) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)") +
theme_paper +
theme(panel.grid.major.y = element_blank())
p_change_french
ggsave("../output/progress_change_french.pdf", width = 5, height = 4)
ggsave("../output/progress_change_french.eps", width = 5, height = 4)
ggsave("../output/progress_change_french.png", width = 9, height = 3)
Make a combination plot for in the paper:
plot_grid(p_french_y1, p_french_y2, p_french_y3, p_change_french,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3/4", "Change"),
rel_heights = c(1, 1, 1, 1.5),
hjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
ggsave("../output/progress_combi_french.pdf", width = 9, height = 9)
ggsave("../output/progress_combi_french.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_french.png", width = 9, height = 9)
p_change_french_y1 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 1"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_french_y2 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 2"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_french_y3 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 3/4"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
filler_plot <- qplot() +
theme_nothing() +
theme(panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
plot_grid(
filler_plot,
p_french_y1, filler_plot, p_change_french_y1, filler_plot,
p_french_y2, filler_plot, p_change_french_y2, filler_plot,
p_french_y3, filler_plot, p_change_french_y3,
ncol = 1,
align = "hv", axis = "tblr",
labels = c(NA,
"Year 1", NA, NA, NA,
"Year 2", NA, NA, NA,
"Year 3/4", NA, NA),
rel_heights = c(.1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75),
hjust = -0.1,
vjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_french.pdf", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_french.eps", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_alt_french.png", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
NOTE: chapters without a number (Bridging the Gap, Exam Preparation) are shown as “O” in the plot. They don’t seem to fit neatly in the chapter sequence, so I’m grouping them together.
p_english_y1 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y2 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y3 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 3"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y4 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_progress_english <- plot_grid(p_english_y1, p_english_y2, p_english_y3, p_english_y4,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3", "Year 4"),
hjust = -0.1,
scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 69 rows containing missing values (position_stack).
Warning: Removed 60 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
p_progress_english
ggsave("../output/progress_english.pdf", width = 9, height = 9)
ggsave("../output/progress_english.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_english.png", width = 9, height = 9)
Did the share of trials change between school years?
ggplot(progress_lockdown[course == "English" & level != "Other"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(level ~ year) +
geom_col(colour = NA) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter",
title = "English") +
theme_paper
Change between school years:
p_change_english <- ggplot(progress_lockdown[school_year == "19/20" & course == "English" & level != "Other"], aes(colour = chapter_simple)) +
facet_grid(year ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)") +
theme_paper +
theme(panel.grid.major.y = element_blank())
p_change_english
ggsave("../output/progress_change_english.pdf", width = 9, height = 6)
ggsave("../output/progress_change_english.eps", width = 9, height = 6)
ggsave("../output/progress_change_english.png", width = 9, height = 6)
Perform a chi-square test of homogeneity to determine whether school years are significantly different.
for (y in sort(unique(progress_lockdown$year))) {
for (l in levels(progress_lockdown$level)) {
d <- progress_lockdown[course == "English" & year == y & level == l]
if (nrow(d) > 0) {
print(paste("English", y, l, collapse= " "))
print(
chisq.test(
dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
)
)
}
}
}
[1] "English Year 1 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 1902.8, df = 7, p-value < 2.2e-16
[1] "English Year 1 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 59650, df = 7, p-value < 2.2e-16
[1] "English Year 1 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 236327, df = 7, p-value < 2.2e-16
[1] "English Year 1 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 47589, df = 7, p-value < 2.2e-16
[1] "English Year 2 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 4117.3, df = 7, p-value < 2.2e-16
[1] "English Year 2 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 26122, df = 8, p-value < 2.2e-16
[1] "English Year 2 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 16303, df = 8, p-value < 2.2e-16
[1] "English Year 2 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 84027, df = 7, p-value < 2.2e-16
[1] "English Year 3 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 16118, df = 6, p-value < 2.2e-16
[1] "English Year 3 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 20169, df = 6, p-value < 2.2e-16
[1] "English Year 3 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 60993, df = 8, p-value < 2.2e-16
[1] "English Year 3 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 41120, df = 7, p-value < 2.2e-16
[1] "English Year 4 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 20187, df = 5, p-value < 2.2e-16
Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).
Make a combination plot for in the paper:
progress_lockdown_english <- progress_lockdown[level != "Other" & school_year == "19/20" & course == "English"]
progress_lockdown_english[, level := factor(level)]
p_change_english_y1 <- ggplot(progress_lockdown_english[year == "Year 1"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y2 <- ggplot(progress_lockdown_english[year == "Year 2"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y3 <- ggplot(progress_lockdown_english[year == "Year 3"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y4 <- ggplot(progress_lockdown_english[year == "Year 4"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
filler_plot <- qplot() +
theme_nothing() +
theme(panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
plot_grid(
filler_plot,
p_english_y1, filler_plot, p_change_english_y1, filler_plot,
p_english_y2, filler_plot, p_change_english_y2, filler_plot,
p_english_y3, filler_plot, p_change_english_y3, filler_plot,
p_english_y4, filler_plot, p_change_english_y4,
ncol = 1,
align = "hv", axis = "tblr",
labels = c(NA,
"Year 1", NA, NA, NA,
"Year 2", NA, NA, NA,
"Year 3", NA, NA, NA,
"Year 4", NA, NA),
rel_heights = c(.1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75),
hjust = -0.1,
vjust = -0.1,
scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 69 rows containing missing values (position_stack).
Warning: Removed 60 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_english.pdf", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_english.eps", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_alt_english.png", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=nl_NL.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=nl_NL.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] lmerTest_3.1-0 lme4_1.1-21 Matrix_1.2-18 cowplot_0.9.4
[5] ggplot2_3.3.2 DBI_1.1.0 data.table_1.13.6
loaded via a namespace (and not attached):
[1] tidyselect_0.2.5 xfun_0.21 purrr_0.3.2
[4] splines_3.6.3 lattice_0.20-41 colorspace_1.4-1
[7] vctrs_0.2.2 htmltools_0.3.6 viridisLite_0.3.0
[10] yaml_2.2.0 blob_1.2.1 rlang_0.4.10
[13] pillar_1.4.2 nloptr_1.2.1 glue_1.3.1
[16] withr_2.3.0 bit64_0.9-7 plyr_1.8.4
[19] lifecycle_0.1.0 stringr_1.4.0 munsell_0.5.0
[22] gtable_0.3.0 evaluate_0.14 memoise_1.1.0
[25] labeling_0.3 knitr_1.23 Rcpp_1.0.6
[28] scales_1.0.0 jsonlite_1.6 bit_1.1-14
[31] digest_0.6.19 stringi_1.4.3 dplyr_0.8.3
[34] numDeriv_2016.8-1.1 tools_3.6.3 magrittr_1.5
[37] tibble_2.1.3 RSQLite_2.2.0 crayon_1.3.4
[40] tidyr_1.0.0 pkgconfig_2.0.2 MASS_7.3-51.4
[43] ellipsis_0.3.0 ggridges_0.5.1 assertthat_0.2.1
[46] minqa_1.2.4 rmarkdown_2.6 R6_2.4.0
[49] boot_1.3-25 nlme_3.1-149 compiler_3.6.3